library(reshape2) # melt function
library(ggplot2) # ggplot function
library(pcaPP) # Fast Kendall function
library(energy) # Distance Correlation
library(Hmisc) # Hoeffding's D measure
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(entropy) # Mutual Information
library(minerva) # Maximum Information Coefficient
library(XICOR) # Chatterjee's Coefficient
library(dHSIC) # Hilbert Schmidt Independence Criterion
library(VineCopula) # Blomqvist's Beta

source("data_generation.R")
generate_quadratic <- function(n){
  t(sapply(1:n, function(i){
    x <- stats::runif(1, min = -1, max = 1)
    y <- x^2
    c(x,y)
  }))
}

generate_quadratic2 <- function(n){
  t(sapply(1:n, function(i){
    x <- stats::runif(1, min = -1, max = 1)
    y <- x^2 + stats::rnorm(1, mean = 0, sd = 0.3)
    c(x,y)
  }))
}

gen_vertical1 <- function(n){
  t(sapply(1:n, function(i){
    x <- stats::runif(1, min = -2, max = 2)
    y <- 0 
    c(x,y)
  }))
}

gen_vertical2 <- function(n){
  t(sapply(1:n, function(i){
    x <- 0
    y <- stats::runif(1, min = -2, max = 2)
    c(x,y)
  }))
}

generate_power <- function(n){
  t(sapply(1:n, function(i){
    x <- stats::runif(1, min = 0, max = 2)
    y <- x^10 + stats::rnorm(1, mean = 10, sd = 10)
    c(x,y)
  }))
}

gen_linear <- function(n){
  t(sapply(1:n, function(i){
    # generate "cluster" assignment
    x <- stats::rnorm(1, mean = 2.5, sd = 1)
    y <- x + stats::rnorm(1, mean = 0, sd = 1)
    c(x,y)
  }))
}

gen_strong_linear <- function(n){
  t(sapply(1:n, function(i){
    # generate "cluster" assignment
    x <- stats::rnorm(1, mean = 2.5, sd = 1)
    y <- x + stats::rnorm(1, mean = 0, sd = 0.3)
    c(x,y)
  }))
}

gen_sine <- function(n){
  t(sapply(1:n, function(i){
    x <- stats::runif(1, min = -2, max = 12)
    y <- sin(x) + stats::rnorm(1, mean = 0, sd = 0.1)
    c(x,y)
  }))
}
quadratic_1 <- generate_quadratic(100)
plot(quadratic_1)

exper_n <- 500

ver1 <- gen_vertical1(exper_n)
plot(ver1)

ver2 <- gen_vertical2(exper_n)
plot(ver2)

cross <- rbind(ver1, ver2)
plot(cross)

power <- generate_power(exper_n)
plot(power)

quadratic2 <- generate_quadratic2(400)
plot(quadratic2)

sine <- gen_sine(exper_n)
plot(sine)

Dcor vs Pearson

plot(quadratic_1)

energy::dcor(quadratic_1[,1],quadratic_1[,2])
## [1] 0.5239368
stats::cor(quadratic_1, method = "pearson")
##            [,1]       [,2]
## [1,] 1.00000000 0.06383295
## [2,] 0.06383295 1.00000000
#stats::cor(quadratic_1, method = "spearman")

Things to consider

lollipop <- .generate_lollipop(500)
plot(lollipop)

stats::cor(lollipop)
##           [,1]      [,2]
## [1,] 1.0000000 0.8481493
## [2,] 0.8481493 1.0000000
energy::dcor(lollipop[,1],lollipop[,2])
## [1] 0.8722402

XI vs Spearman

Linear

lin <- gen_linear(200)
plot(lin)

XICOR::calculateXI(lin[,1], lin[,2])
## [1] 0.3551339
stats::cor(lin, method = "spearman")
##           [,1]      [,2]
## [1,] 1.0000000 0.6978099
## [2,] 0.6978099 1.0000000

Quadratic

XICOR::calculateXI(quadratic_1[,1], quadratic_1[,2])
## [1] 0.9408941
stats::cor(quadratic_1, method = "spearman")
##           [,1]      [,2]
## [1,] 1.0000000 0.1189439
## [2,] 0.1189439 1.0000000

Sine

XICOR::calculateXI(sine[,1], sine[,2])
## [1] 0.8057552
stats::cor(sine, method = "spearman")
##            [,1]       [,2]
## [1,] 1.00000000 0.03281725
## [2,] 0.03281725 1.00000000

Hoeffding’s D vs XI

quadratic

Hmisc::hoeffd(sine[,1], sine[,2])$D
##            x          y
## x 1.00000000 0.04743196
## y 0.04743196 1.00000000
XICOR::calculateXI(sine[,1], sine[,2])
## [1] 0.8057552
plot(quadratic_1)

Hmisc::hoeffd(quadratic_1[,1], quadratic_1[,2])$D
##           x         y
## x 1.0000000 0.2475974
## y 0.2475974 1.0000000
XICOR::calculateXI(quadratic_1[,1], quadratic_1[,2])
## [1] 0.9408941

Quadratic with noise

plot(quadratic2)

Hmisc::hoeffd(quadratic2[,1], quadratic2[,2])$D
##            x          y
## x 1.00000000 0.04383696
## y 0.04383696 1.00000000
XICOR::calculateXI(quadratic2[,1], quadratic2[,2])
## [1] 0.2730205

Beta vs Spearman

steep exponential

plot(power)

stats::cor(power, method = "spearman")
##           [,1]      [,2]
## [1,] 1.0000000 0.7451254
## [2,] 0.7451254 1.0000000
VineCopula::BetaMatrix(power)
##       [,1]  [,2]
## [1,] 1.000 0.452
## [2,] 0.452 1.000
gen_quadrant <- function(n){
  t(sapply(1:n, function(i){
    rand_int <- sample(1:4, 1)
    if (rand_int == 1){
      x <- stats::runif(1, min = 0, max = 3)
      y <- stats::runif(1, min = 0, max = 3)
      
    } else if (rand_int == 2){
      x <- stats::runif(1, min = 0, max = 4)
      y <- stats::runif(1, min = 10, max = 14)
      
    } else if (rand_int == 3){
      x <- stats::runif(1, min = 10, max = 14)
      y <- stats::runif(1, min = 0, max = 4)
      
    } else {
      x <- stats::runif(1, min = 10, max = 14)
      y <- stats::runif(1, min = 10, max = 14)
    }
    c(x,y)
  }))
}

quadrant <- gen_quadrant(exper_n)

mean(quadrant[, 1])
## [1] 6.540044
mean(quadrant[, 2])
## [1] 6.86236
plot(quadrant)

Beta vs MI

VineCopula::BetaMatrix(quadrant)
##       [,1]  [,2]
## [1,] 1.000 0.708
## [2,] 0.708 1.000
mi.empirical(entropy::discretize2d(as.matrix(quadrant[, 1]), as.matrix(quadrant[, 2]), numBins1 = 20, numBins2 = 20))
## Warning in KL.plugin(freqs2d, freqs.null, unit = unit): Vanishing value(s) in
## argument freqs2!
## [1] 0.1956087

Beta vs Kendall

plot(quadrant)

VineCopula::BetaMatrix(quadrant)
##       [,1]  [,2]
## [1,] 1.000 0.708
## [2,] 0.708 1.000
stats::cor(quadrant, method = "kendall")
##            [,1]       [,2]
## [1,] 1.00000000 0.07547896
## [2,] 0.07547896 1.00000000
#minerva::mine(quadrant)$MIC
plot(lin)

VineCopula::BetaMatrix(lin)
##      [,1] [,2]
## [1,] 1.00 0.87
## [2,] 0.87 1.00
stats::cor(lin, method = "kendall")
##           [,1]      [,2]
## [1,] 1.0000000 0.5114573
## [2,] 0.5114573 1.0000000
#minerva::mine(quadrant)$MIC

MIC vs MI

plot(quadratic_1)

minerva::mine(quadratic_1)$MIC
##      [,1] [,2]
## [1,]    1    1
## [2,]    1    1
mi.empirical(entropy::discretize2d(as.matrix(quadratic_1[, 1]), as.matrix(quadratic_1[, 2]), numBins1 = 10, numBins2 = 10))
## [1] 1.285651

Explore more about the lollipop

original <- .generate_lollipop
set.seed(11)
lollipop <- original(500)
plot(lollipop)

stats::cor(lollipop, method = "pearson")[1,2]
## [1] 0.8536084
energy::dcor(lollipop[,1],lollipop[,2])
## [1] 0.8884833
XICOR::calculateXI(lollipop[,1], lollipop[,2])
## [1] 0.5954904
stats::cor(lollipop, method = "spearman")[1,2]
## [1] 0.8496716
Hmisc::hoeffd(lollipop[,1], lollipop[,2])$D
##           x         y
## x 1.0000000 0.4040181
## y 0.4040181 1.0000000
XICOR::calculateXI(lollipop[,1], lollipop[,2])
## [1] 0.5954904
stats::cor(lollipop, method = "spearman")[1,2]
## [1] 0.8496716
VineCopula::BetaMatrix(lollipop)[1,2]
## [1] 0.592
VineCopula::BetaMatrix(lollipop)[1,2]
## [1] 0.592
stats::cor(lollipop, method = "kendall")[1,2]
## [1] 0.6607615
new_lollipop1 <- function(n){
  t(sapply(1:n, function(i){
    # generate "cluster" assignment
    k <- sample(1:3, 1)
    
    # generate "ball"
    if(k == 1){
      x <- stats::rnorm(1, mean = 0, sd = 1)
      y <- stats::rnorm(1, mean = 0, sd = 1)
    
      # generate "bridge"
    } else if (k == 2){
      x <- stats::rnorm(1, mean = 1, sd = 0.5)
      y <- stats::rnorm(1, mean = 2, sd = 0.5)
      
    } else {
      # k == 3, generate "stick"
      x <- stats::rnorm(1, mean = 3, sd = 0.8)
      y <- x + stats::rnorm(1, mean = 1, sd = 0.5)
    } 
    
    c(x,y)
  }))
}
set.seed(10)
new_pop1 <- new_lollipop1(500)
plot(new_pop1)

stats::cor(new_pop1, method = "pearson")[1,2]
## [1] 0.8219725
energy::dcor(new_pop1[,1],new_pop1[,2])
## [1] 0.8336293
XICOR::calculateXI(new_pop1[,1], new_pop1[,2])
## [1] 0.5233341
stats::cor(new_pop1, method = "spearman")[1,2]
## [1] 0.8256502
Hmisc::hoeffd(new_pop1[,1], new_pop1[,2])$D
##           x         y
## x 1.0000000 0.3500869
## y 0.3500869 1.0000000
XICOR::calculateXI(new_pop1[,1], new_pop1[,2])
## [1] 0.5233341
stats::cor(new_pop1, method = "spearman")[1,2]
## [1] 0.8256502
VineCopula::BetaMatrix(new_pop1)[1,2]
## [1] 0.576
VineCopula::BetaMatrix(new_pop1)[1,2]
## [1] 0.576
stats::cor(new_pop1, method = "kendall")[1,2]
## [1] 0.6452745
set.seed(10)
new_lollipop2 <- function(n){
  t(sapply(1:n, function(i){
    # generate "cluster" assignment
    k <- sample(1:2, 1)
    
    # generate "ball"
    if(k == 1){
      x <- stats::rnorm(1, mean = 0, sd = 1)
      y <- stats::rnorm(1, mean = 0, sd = 1)
    } else {
      # k == 2, generate "stick"
      x <- stats::rnorm(1, mean = 1, sd = 0.8)
      y <- x + stats::rnorm(1, mean = 0.5, sd = 0.5)
    }
        
    c(x,y)
  }))
}
new_pop2 <- new_lollipop2(500)
plot(new_pop2)

stats::cor(new_pop2, method = "pearson")[1,2]
## [1] 0.5238693
energy::dcor(new_pop2[,1],new_pop2[,2])
## [1] 0.5163513
XICOR::calculateXI(new_pop2[,1], new_pop2[,2])
## [1] 0.1993448
stats::cor(new_pop2, method = "spearman")[1,2]
## [1] 0.5289624
Hmisc::hoeffd(new_pop2[,1], new_pop2[,2])$D
##           x         y
## x 1.0000000 0.1187861
## y 0.1187861 1.0000000
XICOR::calculateXI(new_pop2[,1], new_pop2[,2])
## [1] 0.1993448
stats::cor(new_pop2, method = "spearman")[1,2]
## [1] 0.5289624
VineCopula::BetaMatrix(new_pop2)[1,2]
## [1] 0.412
VineCopula::BetaMatrix(new_pop2)[1,2]
## [1] 0.412
stats::cor(new_pop2, method = "kendall")[1,2]
## [1] 0.3859559
set.seed(15)
new_lollipop3 <- function(n){
  t(sapply(1:n, function(i){
    # generate "cluster" assignment
    k <- sample(1:2, 1)
    
    # generate "ball"
    if(k == 1){
      x <- stats::rnorm(1, mean = 0, sd = 1)
      y <- stats::rnorm(1, mean = 0, sd = 1)
      
    } else {
      # k == 2, generate "stick"
      x <- stats::rnorm(1, mean = 3, sd = 0.5)
      y <- x + stats::rnorm(1, mean = 1, sd = 0.5)
    }
        
    c(x,y)
  }))
}
new_pop3 <- new_lollipop3(500)
plot(new_pop3)

stats::cor(new_pop3, method = "pearson")[1,2]
## [1] 0.8528809
energy::dcor(new_pop3[,1],new_pop3[,2])
## [1] 0.8887818
XICOR::calculateXI(new_pop3[,1], new_pop3[,2])
## [1] 0.5629103
stats::cor(new_pop3, method = "spearman")[1,2]
## [1] 0.8437295
Hmisc::hoeffd(new_pop3[,1], new_pop3[,2])$D
##          x        y
## x 1.000000 0.375671
## y 0.375671 1.000000
XICOR::calculateXI(new_pop3[,1], new_pop3[,2])
## [1] 0.5629103
stats::cor(new_pop3, method = "spearman")[1,2]
## [1] 0.8437295
VineCopula::BetaMatrix(new_pop3)[1,2]
## [1] 0.568
VineCopula::BetaMatrix(new_pop3)[1,2]
## [1] 0.568
stats::cor(new_pop3, method = "kendall")[1,2]
## [1] 0.6421323
set.seed(230)
new_lollipop4 <- function(n){
  t(sapply(1:n, function(i){
    # generate "cluster" assignment
    k <- sample(c(1,2), 1)
    
    # generate "ball"
    if(k == 1){
      x <- stats::rnorm(1, mean = 0, sd = 1)
      y <- stats::rnorm(1, mean = 0, sd = 1)
    } else {
      # k == 2, generate "stick"
      x <- stats::rnorm(1, mean = 1, sd = 0.8)
      y <- x + stats::rnorm(1, mean = 1, sd = 0.5)
    }
    
    c(x,y)
  }))
}
new_pop4 <- new_lollipop4(500)
plot(new_pop4)

stats::cor(new_pop4, method = "pearson")[1,2]
## [1] 0.5628619
energy::dcor(new_pop4[,1],new_pop4[,2])
## [1] 0.596334
XICOR::calculateXI(new_pop4[,1], new_pop4[,2])
## [1] 0.2703851
stats::cor(new_pop4, method = "spearman")[1,2]
## [1] 0.6033118
Hmisc::hoeffd(new_pop4[,1], new_pop4[,2])$D
##           x         y
## x 1.0000000 0.1734984
## y 0.1734984 1.0000000
XICOR::calculateXI(new_pop4[,1], new_pop4[,2])
## [1] 0.2703851
stats::cor(new_pop4, method = "spearman")[1,2]
## [1] 0.6033118
VineCopula::BetaMatrix(new_pop4)[1,2]
## [1] 0.396
VineCopula::BetaMatrix(new_pop4)[1,2]
## [1] 0.396
stats::cor(new_pop4, method = "kendall")[1,2]
## [1] 0.4482886